daily-assignment 22

ESS 330

Author

Nina Hayford

Load Libraries

library(dataRetrieval)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(timetk)
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
✔ broom        1.0.7     ✔ rsample      1.2.1
✔ dials        1.4.0     ✔ tibble       3.2.1
✔ ggplot2      3.5.1     ✔ tidyr        1.3.1
✔ infer        1.0.7     ✔ tune         1.3.0
✔ modeldata    1.4.0     ✔ workflows    1.2.0
✔ parsnip      1.3.1     ✔ workflowsets 1.1.0
✔ purrr        1.0.4     ✔ yardstick    1.3.2
✔ recipes      1.2.1     
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ purrr::discard() masks scales::discard()
✖ dplyr::filter()  masks stats::filter()
✖ dplyr::lag()     masks stats::lag()
✖ recipes::step()  masks stats::step()
library(modeltime)
library(fable)
Loading required package: fabletools
Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr

Attaching package: 'fabletools'
The following object is masked from 'package:yardstick':

    accuracy
The following object is masked from 'package:parsnip':

    null_model
The following objects are masked from 'package:infer':

    generate, hypothesize
library(tsibble)

Attaching package: 'tsibble'
The following object is masked from 'package:lubridate':

    interval
The following objects are masked from 'package:base':

    intersect, setdiff, union
library(prophet)
Loading required package: Rcpp

Attaching package: 'Rcpp'
The following object is masked from 'package:rsample':

    populate
Loading required package: rlang

Attaching package: 'rlang'
The following objects are masked from 'package:purrr':

    %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
    flatten_raw, invoke, splice
library(ggplot2)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout

Get and Prep Data

poudre_flow <- readNWISdv(siteNumber = "06752260",
                          parameterCd = "00060",
                          startDate = "2013-01-01",
                          endDate = "2023-12-31") |>
  renameNWISColumns() |>
  mutate(Date = floor_date(Date, "month")) |>
  group_by(Date) |>
  summarise(Flow = mean(Flow, na.rm = TRUE)) |>
  ungroup()
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2013-01-01&endDT=2023-12-31

Split Data and Build Models

# Split training/testing
splits <- initial_time_split(poudre_flow, prop = 0.9)

# Create Prophet model
model_prophet <- prophet_reg(seasonality_yearly = TRUE) %>%
  set_engine("prophet") %>%
  fit(Flow ~ Date, data = training(splits))
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# Create ARIMA model
model_arima <- arima_reg() %>%
  set_engine("auto_arima") %>%
  fit(Flow ~ Date, data = training(splits))
frequency = 12 observations per 1 year
# Combine models
models_tbl <- modeltime_table(
  model_prophet,
  model_arima
)

Forecast Next 12 Months

forecast_tbl <- models_tbl %>%
  modeltime_forecast(
    new_data = testing(splits),
    actual_data = poudre_flow
  )
plot_modeltime_forecast(forecast_tbl)
Warning: ✖ Expecting the following names to be in the data frame: .conf_hi, .conf_lo.
ℹ Proceeding with '.conf_interval_show = FALSE' to visualize the forecast without confidence intervals.
Alternatively, try using `modeltime_calibrate()` before forecasting to add confidence intervals.

Get Real Data for Comparison

obs_2024 <- readNWISdv(siteNumber = "06752260",
                       parameterCd = "00060",
                       startDate = "2024-01-01",
                       endDate = "2024-12-31") |>
  renameNWISColumns() |>
  mutate(Date = floor_date(Date, "month")) |>
  group_by(Date) |>
  summarise(Observed = mean(Flow, na.rm = TRUE)) |>
  ungroup()
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2024-01-01&endDT=2024-12-31

Predict with the Best Model

# Prepare future data for prediction
future_tbl <- poudre_flow %>%
  bind_rows(
    tibble(Date = seq.Date(from = as.Date("2024-01-01"),
                           by = "month", length.out = 12),
           Flow = NA)
  )

# Refit and forecast next 12 months
refit_tbl <- models_tbl %>%
  modeltime_refit(data = poudre_flow)
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
frequency = 12 observations per 1 year
# Generate predictions for future data
pred_tbl <- refit_tbl %>%
  modeltime_forecast(
    new_data = future_tbl,
    actual_data = poudre_flow
  ) %>%
  filter(.key == "prediction")  # Ensure you're getting predictions, not actual values

# Ensure the Date column is correctly formatted
pred_tbl$Date <- as.Date(pred_tbl$.index)

# Check the structure and the first few rows of predictions
head(pred_tbl)
# Forecast Results
  
Conf Method: conformal_default | Conf Interval: | Conf By ID: FALSE (GLOBAL
CONFIDENCE)
# A tibble: 6 × 6
  .model_id .model_desc .key       .index     .value Date      
      <int> <chr>       <fct>      <date>      <dbl> <date>    
1         1 PROPHET     prediction 2013-01-01   170. 2013-01-01
2         1 PROPHET     prediction 2013-02-01   175. 2013-02-01
3         1 PROPHET     prediction 2013-03-01   174. 2013-03-01
4         1 PROPHET     prediction 2013-04-01   229. 2013-04-01
5         1 PROPHET     prediction 2013-05-01   988. 2013-05-01
6         1 PROPHET     prediction 2013-06-01  1122. 2013-06-01

Join Observed and Predicted

# Rename .value to Predicted in pred_tbl
pred_tbl <- pred_tbl %>%
  rename(Predicted = .value)

# Join predictions with observed data
comparison <- left_join(pred_tbl, obs_2024, by = "Date") %>%
  filter(!is.na(Observed))

# Check the first few rows of the joined data
head(comparison)
# Forecast Results
  
Conf Method: conformal_default | Conf Interval: | Conf By ID: FALSE (GLOBAL
CONFIDENCE)
# A tibble: 6 × 7
  .model_id .model_desc .key       .index     Predicted Date       Observed
      <int> <chr>       <fct>      <date>         <dbl> <date>        <dbl>
1         1 PROPHET     prediction 2024-01-01     -91.7 2024-01-01     11.0
2         1 PROPHET     prediction 2024-02-01     -94.3 2024-02-01     24.5
3         1 PROPHET     prediction 2024-03-01     -82.0 2024-03-01     49.6
4         1 PROPHET     prediction 2024-04-01     -27.2 2024-04-01    104. 
5         1 PROPHET     prediction 2024-05-01     727.  2024-05-01    194. 
6         1 PROPHET     prediction 2024-06-01     852.  2024-06-01    383. 

Fit the Linear Model

# Fit the linear model
lm_model <- lm(Observed ~ Predicted, data = comparison)

# Print the model summary
summary(lm_model)

Call:
lm(formula = Observed ~ Predicted, data = comparison)

Residuals:
    Min      1Q  Median      3Q     Max 
-76.483 -32.041  -2.972  30.753  73.597 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  43.0574     8.9601   4.805 8.45e-05 ***
Predicted     0.3132     0.0270  11.598 7.63e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 40.1 on 22 degrees of freedom
Multiple R-squared:  0.8594,    Adjusted R-squared:  0.8531 
F-statistic: 134.5 on 1 and 22 DF,  p-value: 7.625e-11

Compute R^2

lm_model <- lm(Observed ~ Predicted, data = comparison)
summary(lm_model)$r.squared
[1] 0.8594398

Plot Predicted vs Observed

# Plot Predicted vs Observed with 1:1 line and linear model line
ggplot(comparison, aes(x = Predicted, y = Observed)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, color = "red") + # 1:1 line
  geom_smooth(method = "lm", color = "blue") + # Linear regression line
  labs(title = "Predicted vs Observed Streamflow",
       x = "Predicted Flow",
       y = "Observed Flow")
`geom_smooth()` using formula = 'y ~ x'